import os
import re
import shutil
import subprocess
import sys
from collections import defaultdict
from datetime import datetime
from pathlib import Path

import json

from stethistics.checks.minified import raw_ip_aliases_check, raw_concentration_check, raw_subnet_check
from ..config import TMP_EXPORT_DIRECTORY, API_HOURLY_EXPORT_DIRECTORY, ALIASES_THRESHOLD, GROUPS_THRESHOLD, \
    SUBNETS_THRESHOLD, API_DAILY_EXPORT_DIRECTORY, API__HOURLY_BACKUP_EXPORT_DIRECTORY


def serialize_sets(obj):
    if isinstance(obj, set):
        return list(obj)
    return obj

def aggregate(directory, names):
    output = {"stats": {"aggregated_crawls": len(names), "original_filenames": names}}
    tmp_ups = defaultdict(set)
    for filename in names:
        with open(directory + filename, "r") as file:
            data = json.load(file)
            for node in data['up']:
                tmp_ups[(node['IP address'], node['UDP port'])].update(node['Seen node IDs'])

    output["up"] = [{'IP address': ip, 'UDP port': port, 'Seen node IDs': tuple(ids)} for (ip, port), ids in
                    tmp_ups.items()]
    output["stats"].update(
        {"up_count": len(output["up"])})
    return output


def tee(x):
    print(x)
    return x


def aggregate_day():
    try:
        names = [x for x in list(next(os.walk(API_HOURLY_EXPORT_DIRECTORY))[1]) if x != "latest"]
    except:
        return

    values = {API_HOURLY_EXPORT_DIRECTORY + n + "/": [v + "/" + "raw_aggregated_results.json" for v in
                                                      next(os.walk(API_HOURLY_EXPORT_DIRECTORY + n))[1]] for n in names}
    output = {k.split("/")[-2]: aggregate(k, v) for k, v in values.items()}

    for n in names:
        shutil.rmtree(API_HOURLY_EXPORT_DIRECTORY + n)

    for k, v in output.items():
        path = API_DAILY_EXPORT_DIRECTORY + k + "/"
        Path(path).mkdir(parents=True, exist_ok=True)
        with open(path + 'raw_aggregated_results.json', 'w') as outfile:
            json.dump(v, outfile)
        with open(path + 'aliases.json', 'w') as outfile:
            json.dump(raw_ip_aliases_check(v, ALIASES_THRESHOLD), outfile)
        with open(path + 'groups.json', 'w') as outfile:
            json.dump(raw_concentration_check(v, GROUPS_THRESHOLD), outfile)
        with open(path + 'subnets.json', 'w') as outfile:
            json.dump(raw_subnet_check(v, SUBNETS_THRESHOLD), outfile, default=serialize_sets)


def aggregate_hour(current, previous):
    filename_regex = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}\.json\Z"
    names = [n for n in sorted(os.listdir(TMP_EXPORT_DIRECTORY)) if re.match(filename_regex, n)]

    output = aggregate(TMP_EXPORT_DIRECTORY, names)

    aliases_res = raw_ip_aliases_check(output, ALIASES_THRESHOLD)
    groups_res = raw_concentration_check(output, GROUPS_THRESHOLD)
    subnets_res = raw_subnet_check(output, SUBNETS_THRESHOLD)

    for path in (
            API_HOURLY_EXPORT_DIRECTORY + f'{previous.strftime("%d_%m_%Y")}/{previous.hour if previous.hour > 9 else f"0{previous.hour}"}/',
            API__HOURLY_BACKUP_EXPORT_DIRECTORY + f'{previous.strftime("%d_%m_%Y")}/{previous.hour if previous.hour > 9 else f"0{previous.hour}"}/',
            API_HOURLY_EXPORT_DIRECTORY + "latest/"):
        Path(path).mkdir(parents=True, exist_ok=True)
        with open(path + 'raw_aggregated_results.json', 'w') as outfile:
            json.dump(output, outfile)
        with open(path + 'aliases.json', 'w') as outfile:
            json.dump(aliases_res, outfile)
        with open(path + 'groups.json', 'w') as outfile:
            json.dump(groups_res, outfile)
        with open(path + 'subnets.json', 'w') as outfile:
            json.dump(subnets_res, outfile, default=serialize_sets)

    with open('api/stats.json', 'r') as infile:
        stats = json.load(infile)
    subnets_count = 0
    for _, values in subnets_res["results"].items():
        if len(values) >= subnets_res["threshold"]:
            subnets_count += 1
    stats[previous.strftime("%d_%m_%Y_%H")] = {"up_count": len(output["up"]),
                                               "aliases_count": len(aliases_res["results"]),
                                               "subnets_count": subnets_count,
                                               "groups_count": len(groups_res["results"]),
                                               "aggregated_files": len(names)}
    with open('api/stats.json', 'w') as outfile:
        json.dump(stats, outfile)
    for filename in names:
        try:
            os.remove(TMP_EXPORT_DIRECTORY + filename)
        except:
            pass
    if previous.day != current.day:
        aggregate_day()


def feed_api(is_quiet):
    while True:
        previous_date = datetime.utcnow()
        current_date = previous_date
        if not is_quiet:
            print(f"starting crawls for hour {previous_date.hour}")
        while previous_date.hour == current_date.hour:
            subprocess.run([sys.argv[0], '-r', '-e', 'tmp', '-q'])
            current_date = datetime.utcnow()
        if not is_quiet:
            print("finished, exporting current hour's crawls")
        aggregate_hour(current_date, previous_date)
